#!/usr/bin/env python
# -*- encoding: utf-8 -*-
# Project: spd-sxmcc
"""
@author: lyndon
@time Created on 2018/11/28 17:05
@desc
"""

from pyquery import PyQuery as pq

html = '''<li><span>&#24314;&#31569;&#24180;&#20195;&#65306;</span>2008&#24180;</li>
<li><span>&#24314;&#31569;&#31867;&#22411;&#65306;</span>&#26495;&#22612;&#32467;&#21512;</li>
<li><span>&#27004;&#26635;&#24635;&#25968;&#65306;</span>41</li>
<li><span>&#25151;&#23627;&#24635;&#25968;&#65306;</span>6437&#25143;</li>
                                                                
<li><span>&#20108;&#25163;&#25151;&#28304;&#65306;</span><a href="/xq-ershoufang/358503/" target="_blank">164&#22871;</a></li>
                    
                                            
<li><span>&#31199;&#25151;&#25151;&#28304;&#65306;</span><a href="/xq-zufang/358503/" target="_blank">46&#22871;</a></li>'''

doc = pq(html)
# print(type(doc))
# print(doc)
# 注意class=item-0 active是一个class的属性，但是在pyquery里面要是中间也是空格隔开的话，
# 就变成了item-0下的active标签下的a标签了，所以这里空格必须改成点
print('--------------------------------------------')
item_a = doc("li").items()
# print(type(item_a))
# print(item_a)
for i in item_a:
    # print(type(i))
    # print(i)
    # print(i('span').text())
    print(i.text())
    # t = i.text().encode('utf-8')
    # a = i('a')
    # print('a:' + a)
    # print(i.attr.href)
    print('------------------------')
# 获取属性值的两种方法
# print(item.attr.href)
# print(item.attr('href'))


# from pyquery import PyQuery as pyq
#
# html = '''<h3 xmlns="http://www.w3.org/1999/xhtml" class="listTit"><a href="/xiaoqu/358503.html" target="_blank">&#30495;&#27494;&#36335;&#24658;&#22823;&#32511;&#27954;&#19996;&#21306;(CBS)</a></h3>'''
#
# doc = pyq(html)
#
# print(doc)
#
# aa = doc('h3')
# print('----------')
# print(aa)
# print('----------')
